Run download_data.Rmd and percentage_of_regional_richness.Rmd First!
city_data
fetch_city_data_for <- function(pool_name, include_city_name = F) {
results_filename <- paste(paste(pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
pool_size_col_name <- paste(pool_name, 'pool', 'size', sep = "_")
required_columns <- c("response", pool_size_col_name, "population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_to_pm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urban", "shrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population", "city_ndvi", "city_ssm", "city_susm", "region_20km_ndvi", "region_20km_ssm", "region_20km_susm", "region_50km_ndvi", "region_50km_ssm", "region_50km_susm", "region_100km_ndvi", "region_100km_ssm", "region_100km_susm", "city_percentage_protected", "region_20km_percentage_protected", "region_50km_percentage_protected", "region_100km_percentage_protected")
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
joined[,required_columns]
}
merlin_city_data <- fetch_city_data_for('merlin')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data
library(randomForest)
library(reshape2)
library(rpart)
library(ggplot2)
library(tidyverse)
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Attaching package: ‘survival’
The following object is masked from ‘package:boot’:
aml
Loading required package: TH.data
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
Attaching package: ‘TH.data’
The following object is masked from ‘package:MASS’:
geyser
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 16.9 93.78 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 17.12 94.95 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 17.13 95.01 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 17.35 96.27 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 16.71 92.71 |
merlin_city_data_fixed
source('./random_forest_selection_functions.R')
scale_parameter_name <- function(scale, postscript) {
paste('region', paste(scale, 'km', sep = ''), postscript, sep = '_')
}
scale_parameters <- function(postscript) {
c(scale_parameter_name(20, postscript), scale_parameter_name(50, postscript), scale_parameter_name(100, postscript))
}
scales_parameters_without <- function(scale_to_exclude, postscript) {
scales <- scale_parameters(postscript)
scales[scales != scale_parameter_name(scale_to_exclude, postscript)]
}
select_scales <- function(urban, cultivated, elevation_delta, mean_elevation, average_pop_density, includes_estuary, ssm, susm, ndv, percentage_protectedi) {
append(
append(
append(
append(
scales_parameters_without(scale_to_exclude = urban, postscript = 'urban'),
scales_parameters_without(scale_to_exclude = cultivated, postscript = 'cultivated')
),
append(
scales_parameters_without(scale_to_exclude = elevation_delta, postscript = 'elevation_delta'),
scales_parameters_without(scale_to_exclude = mean_elevation, postscript = 'mean_elevation')
)
),
append(
append(
scales_parameters_without(scale_to_exclude = average_pop_density, postscript = 'average_pop_density'),
scales_parameters_without(scale_to_exclude = includes_estuary, postscript = 'includes_estuary')
),
append(
scales_parameters_without(scale_to_exclude = ssm, postscript = 'ssm'),
scales_parameters_without(scale_to_exclude = susm, postscript = 'susm')
)
)
),
append(
scales_parameters_without(scale_to_exclude = ndvi, postscript = 'ndvi'),
scales_parameters_without(scale_to_exclude = percentage_protected, postscript = 'percentage_protected')
)
)
}
select_scales(urban = 20, cultivated = 100, elevation_delta = 20, mean_elevation = 100, average_pop_density = NA, includes_estuary = NA, ssm = 20, susm = 20, ndvi = 100, percentage_protected = NA)
[1] "region_50km_urban" "region_100km_urban" "region_20km_cultivated" "region_50km_cultivated" "region_50km_elevation_delta"
[6] "region_100km_elevation_delta" "region_20km_mean_elevation" "region_50km_mean_elevation" "region_20km_average_pop_density" "region_50km_average_pop_density"
[11] "region_100km_average_pop_density" "region_20km_includes_estuary" "region_50km_includes_estuary" "region_100km_includes_estuary" "region_50km_ssm"
[16] "region_100km_ssm" "region_50km_susm" "region_100km_susm" "region_20km_ndvi" "region_50km_ndvi"
[21] "region_20km_percentage_protected" "region_50km_percentage_protected" "region_100km_percentage_protected"
select_scales(urban = , cultivated = , elevation_delta = , mean_elevation = , average_pop_density = , includes_estuary = , ssm = , susm = , ndvi =, percentage_protected = )
select_variables_from_random_forest(merlin_city_data_fixed)
[1] "merlin_pool_size" "biome_name" "realm"
[4] "region_100km_ssm" "region_50km_ssm" "temperature_annual_average"
[7] "region_50km_elevation_delta" "temperature_monthly_min" "region_20km_elevation_delta"
[10] "region_20km_urban" "region_50km_susm" "region_50km_urban"
[13] "rainfall_monthly_min" "permanent_water" "region_100km_elevation_delta"
[16] "region_100km_cultivated" "shrubs" "region_20km_cultivated"
[19] "city_gdp_per_population" "share_of_population_within_400m_of_open_space" "region_20km_ndvi"
[22] "happiness_positive_effect" "herbaceous_wetland" "region_50km_average_pop_density"
[25] "region_50km_cultivated" "city_percentage_protected" "region_20km_average_pop_density"
[28] "region_100km_urban" "city_ndvi" "temperature_monthly_max"
[31] "happiness_future_life" "region_100km_average_pop_density" "rainfall_monthly_max"
[34] "city_average_pop_density" "city_max_pop_density" "mean_population_exposure_to_pm2_5_2019"
[37] "region_20km_susm" "region_20km_ssm" "city_susm"
[40] "region_50km_percentage_protected" "region_100km_susm" "region_100km_percentage_protected"
[43] "region_50km_ndvi" "city_elevation_delta" "city_mean_elevation"
[46] "region_20km_percentage_protected" "rainfall_annual_average" "percentage_urban_area_as_open_public_spaces_and_streets"
[49] "herbaceous_vegetation" "urban" "region_100km_ndvi"
[52] "cultivated" "region_20km_mean_elevation" "city_ssm"
[55] "region_100km_mean_elevation" "region_50km_mean_elevation" "population_growth"
[58] "percentage_urban_area_as_streets" "happiness_negative_effect" "percentage_urban_area_as_open_public_spaces"
[61] "closed_forest" "open_forest"
select_variables_from_random_forest(merlin_city_data_fixed_single_scale)
[1] "merlin_pool_size" "biome_name" "realm"
[4] "region_100km_ssm" "temperature_annual_average" "temperature_monthly_min"
[7] "region_50km_elevation_delta" "rainfall_monthly_min" "permanent_water"
[10] "region_50km_susm" "region_20km_ndvi" "region_20km_urban"
[13] "shrubs" "city_gdp_per_population" "happiness_positive_effect"
[16] "city_percentage_protected" "region_100km_cultivated" "share_of_population_within_400m_of_open_space"
[19] "rainfall_monthly_max" "city_ndvi" "temperature_monthly_max"
[22] "city_average_pop_density" "region_50km_average_pop_density" "rainfall_annual_average"
[25] "city_mean_elevation" "region_50km_percentage_protected" "percentage_urban_area_as_open_public_spaces_and_streets"
[28] "urban" "cultivated" "percentage_urban_area_as_open_public_spaces"
[31] "happiness_negative_effect" "region_20km_mean_elevation" "city_susm"
[34] "city_ssm" "percentage_urban_area_as_streets" "closed_forest"
[37] "open_forest"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size")])
[1] "Mean 18.3291184364462 , SD: 0.239909691174696 , Mean + SD: 18.5690281276209"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name")])
[1] "Mean 15.8873631581584 , SD: 0.180232366982089 , Mean + SD: 16.0675955251404"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm")])
[1] "Mean 14.085087335964 , SD: 0.181457368755455 , Mean + SD: 14.2665447047195"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm")])
[1] "Mean 14.7500741158013 , SD: 0.199005244777679 , Mean + SD: 14.949079360579"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average")])
[1] "Mean 15.0736086564802 , SD: 0.233605527261414 , Mean + SD: 15.3072141837417"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min")])
[1] "Mean 15.2264915835863 , SD: 0.193099894838028 , Mean + SD: 15.4195914784244"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta")])
[1] "Mean 15.2378145280404 , SD: 0.248172547588966 , Mean + SD: 15.4859870756294"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min")])
[1] "Mean 14.9140396251349 , SD: 0.197619251055124 , Mean + SD: 15.11165887619"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water")])
[1] "Mean 14.8536445422209 , SD: 0.254378052303436 , Mean + SD: 15.1080225945243"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm")])
[1] "Mean 15.5273572141801 , SD: 0.273890093511612 , Mean + SD: 15.8012473076917"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi")])
[1] "Mean 15.3186439115578 , SD: 0.227215081868521 , Mean + SD: 15.5458589934263"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban")])
[1] "Mean 15.1681755140829 , SD: 0.224510713543485 , Mean + SD: 15.3926862276264"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs")])
[1] "Mean 15.1902125119541 , SD: 0.226152938160966 , Mean + SD: 15.4163654501151"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population")])
[1] "Mean 15.2065363289081 , SD: 0.255119486173744 , Mean + SD: 15.4616558150818"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect")])
[1] "Mean 15.2791920489499 , SD: 0.262306992952203 , Mean + SD: 15.5414990419021"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected")])
[1] "Mean 15.2653808786396 , SD: 0.243375155516205 , Mean + SD: 15.5087560341558"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated")])
[1] "Mean 15.4694621878868 , SD: 0.281353351110609 , Mean + SD: 15.7508155389974"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space")])
[1] "Mean 15.2754584376954 , SD: 0.245564947611728 , Mean + SD: 15.5210233853072"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max")])
[1] "Mean 15.5025730033436 , SD: 0.254677972901481 , Mean + SD: 15.7572509762451"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi")])
[1] "Mean 15.5523641989269 , SD: 0.237597135507919 , Mean + SD: 15.7899613344348"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max")])
[1] "Mean 15.6384411101055 , SD: 0.220167702046869 , Mean + SD: 15.8586088121524"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density")])
[1] "Mean 15.8461770285193 , SD: 0.290074490489128 , Mean + SD: 16.1362515190084"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density", "region_50km_average_pop_density")])
[1] "Mean 15.8811819521739 , SD: 0.297186312599319 , Mean + SD: 16.1783682647732"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "biome_name", "realm", "region_100km_ssm", "temperature_annual_average", "temperature_monthly_min", "region_50km_elevation_delta", "rainfall_monthly_min", "permanent_water", "region_50km_susm", "region_20km_ndvi", "region_20km_urban", "shrubs", "city_gdp_per_population", "happiness_positive_effect", "city_percentage_protected", "region_100km_cultivated", "share_of_population_within_400m_of_open_space", "rainfall_monthly_max", "city_ndvi", "temperature_monthly_max", "city_average_pop_density", "region_50km_average_pop_density", "rainfall_annual_average")])
[1] "Mean 15.8259267082361 , SD: 0.239634386030856 , Mean + SD: 16.0655610942669"
“merlin_pool_size”, “biome_name”, “realm”
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.552 87.89 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.512 87.25 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.559 88.00 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.703 90.27 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.738 90.84 |
birdlife_city_data_fixed
select_variables_from_random_forest(birdlife_city_data_fixed)
[1] "population_growth" "birdlife_pool_size" "region_50km_ssm"
[4] "region_100km_ssm" "city_ndvi" "region_100km_cultivated"
[7] "biome_name" "region_50km_cultivated" "temperature_monthly_min"
[10] "region_20km_ssm" "percentage_urban_area_as_open_public_spaces" "region_100km_susm"
[13] "region_20km_susm" "region_20km_average_pop_density" "permanent_water"
[16] "rainfall_monthly_min" "region_50km_susm" "percentage_urban_area_as_open_public_spaces_and_streets"
[19] "rainfall_monthly_max" "city_ssm" "region_50km_average_pop_density"
[22] "region_100km_urban" "region_100km_ndvi" "region_50km_ndvi"
[25] "temperature_annual_average" "percentage_urban_area_as_streets" "region_20km_ndvi"
[28] "share_of_population_within_400m_of_open_space" "mean_population_exposure_to_pm2_5_2019" "region_100km_average_pop_density"
[31] "region_20km_cultivated" "city_average_pop_density" "realm"
[34] "region_20km_urban" "region_20km_elevation_delta" "city_susm"
[37] "shrubs" "region_50km_elevation_delta" "rainfall_annual_average"
[40] "happiness_future_life" "region_50km_urban" "region_100km_percentage_protected"
[43] "city_max_pop_density" "region_100km_mean_elevation" "city_elevation_delta"
[46] "region_20km_percentage_protected" "city_mean_elevation" "region_50km_mean_elevation"
[49] "happiness_negative_effect" "happiness_positive_effect" "region_20km_mean_elevation"
[52] "closed_forest" "region_100km_elevation_delta" "urban"
[55] "city_gdp_per_population" "herbaceous_vegetation" "city_percentage_protected"
[58] "open_forest" "cultivated" "temperature_monthly_max"
select_variables_from_random_forest(birdlife_city_data_fixed_single_scale)
[1] "population_growth" "region_50km_ssm" "birdlife_pool_size"
[4] "biome_name" "region_100km_cultivated" "city_ndvi"
[7] "temperature_monthly_min" "percentage_urban_area_as_open_public_spaces" "rainfall_monthly_min"
[10] "region_100km_susm" "region_20km_average_pop_density" "city_ssm"
[13] "permanent_water" "rainfall_monthly_max" "region_100km_urban"
[16] "temperature_annual_average" "percentage_urban_area_as_open_public_spaces_and_streets" "region_20km_elevation_delta"
[19] "share_of_population_within_400m_of_open_space" "shrubs" "mean_population_exposure_to_pm2_5_2019"
[22] "city_average_pop_density" "percentage_urban_area_as_streets" "rainfall_annual_average"
[25] "city_susm" "region_100km_ndvi" "happiness_future_life"
[28] "happiness_negative_effect" "closed_forest" "urban"
[31] "city_mean_elevation" "open_forest" "happiness_positive_effect"
[34] "temperature_monthly_max" "herbaceous_vegetation"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth")])
[1] "Mean 6.35732539091931 , SD: 0.0669192246249271 , Mean + SD: 6.42424461554424"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm")])
[1] "Mean 4.84660140753839 , SD: 0.0809873063808153 , Mean + SD: 4.9275887139192"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size")])
[1] "Mean 4.59720750518497 , SD: 0.0847385212441304 , Mean + SD: 4.6819460264291"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name")])
[1] "Mean 4.79953544345078 , SD: 0.0727373010440456 , Mean + SD: 4.87227274449482"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated")])
[1] "Mean 4.89601872208129 , SD: 0.0740660057383079 , Mean + SD: 4.9700847278196"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi")])
[1] "Mean 4.83770562806036 , SD: 0.0924105715906294 , Mean + SD: 4.93011619965098"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min")])
[1] "Mean 4.8057373947522 , SD: 0.0844941778031387 , Mean + SD: 4.89023157255534"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces")])
[1] "Mean 4.83173065533811 , SD: 0.0874008189762507 , Mean + SD: 4.91913147431437"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min")])
[1] "Mean 4.79148086117739 , SD: 0.0706327485527387 , Mean + SD: 4.86211360973013"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm")])
[1] "Mean 4.91470140038153 , SD: 0.0885243062505973 , Mean + SD: 5.00322570663213"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density")])
[1] "Mean 4.90113408062541 , SD: 0.0961754282783113 , Mean + SD: 4.99730950890372"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm")])
[1] "Mean 4.9964341603519 , SD: 0.0768191114888269 , Mean + SD: 5.07325327184073"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water")])
[1] "Mean 4.97990294565762 , SD: 0.0706673679181856 , Mean + SD: 5.0505703135758"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max")])
[1] "Mean 4.99512840273475 , SD: 0.0942210618369954 , Mean + SD: 5.08934946457175"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban")])
[1] "Mean 4.97268472007057 , SD: 0.10396874824783 , Mean + SD: 5.0766534683184"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average")])
[1] "Mean 5.04114679155501 , SD: 0.0847207533308739 , Mean + SD: 5.12586754488588"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets")])
[1] "Mean 5.02111793795206 , SD: 0.092420920073587 , Mean + SD: 5.11353885802564"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta")])
[1] "Mean 5.04087475473546 , SD: 0.0801326834604886 , Mean + SD: 5.12100743819595"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space")])
[1] "Mean 5.05522409906614 , SD: 0.0925279259536348 , Mean + SD: 5.14775202501977"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs")])
[1] "Mean 5.05563861451361 , SD: 0.093232830904438 , Mean + SD: 5.14887144541805"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019")])
[1] "Mean 5.05966342869761 , SD: 0.0806786305744567 , Mean + SD: 5.14034205927207"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density")])
[1] "Mean 5.12335838078195 , SD: 0.0778399806226634 , Mean + SD: 5.20119836140461"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets")])
[1] "Mean 5.13879647218586 , SD: 0.0845418790784688 , Mean + SD: 5.22333835126433"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average")])
[1] "Mean 5.17392341305297 , SD: 0.0855140683449177 , Mean + SD: 5.25943748139789"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm")])
[1] "Mean 5.24351131619425 , SD: 0.0804843612125177 , Mean + SD: 5.32399567740677"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm", "region_100km_ndvi")])
[1] "Mean 5.30537718299019 , SD: 0.0820416774624035 , Mean + SD: 5.38741886045259"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "region_50km_ssm", "birdlife_pool_size", "biome_name", "region_100km_cultivated", "city_ndvi", "temperature_monthly_min", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "region_100km_susm", "region_20km_average_pop_density", "city_ssm", "permanent_water", "rainfall_monthly_max", "region_100km_urban", "temperature_annual_average", "percentage_urban_area_as_open_public_spaces_and_streets", "region_20km_elevation_delta", "share_of_population_within_400m_of_open_space", "shrubs", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "percentage_urban_area_as_streets", "rainfall_annual_average", "city_susm", "region_100km_ndvi", "happiness_future_life")])
[1] "Mean 5.28047296957072 , SD: 0.104728547457597 , Mean + SD: 5.38520151702831"
“population_growth”, “region_50km_ssm”, “birdlife_pool_size”
| So…. |
|---|
| Merlin: “merlin_pool_size”, “biome_name”, “realm” Birdlife: “population_growth”, “region_50km_ssm”, “birdlife_pool_size” |
| ```r ggplot(merlin_city_data_fixed, aes(x = merlin_pool_size, y = response, color = realm)) + geom_point() + geom_smooth(method = “glm”, se = F) + theme(legend.position = “bottom”) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(merlin_city_data_fixed, aes(x = merlin_pool_size, y = response, color = biome_name)) + geom_point() + geom_smooth(method = “glm”, se = F) + theme(legend.position = “bottom”) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(birdlife_city_data_fixed, aes(x = birdlife_pool_size, y = response, color = region_50km_ssm)) + geom_point() + geom_smooth(method = “glm”, se = F) + theme(legend.position = “bottom”) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(birdlife_city_data_fixed, aes(x = birdlife_pool_size, y = response, color = population_growth)) + geom_point() + geom_smooth(method = “glm”, se = F) + theme(legend.position = “bottom”) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(merlin_city_data_fixed, aes(y = response, x = population_growth)) + geom_point() + geom_smooth(method = “glm”, se = F) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(birdlife_city_data_fixed, aes(y = response, x = population_growth)) + geom_point() + geom_smooth(method = “glm”, se = F) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(merlin_city_data_fixed, aes(y = response, x = region_50km_ssm)) + geom_point() + geom_smooth(method = “glm”, se = F) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
| ```r ggplot(birdlife_city_data_fixed, aes(y = response, x = region_50km_ssm)) + geom_point() + geom_smooth(method = “glm”, se = F) |
| ``` |
`geom_smooth()` using formula 'y ~ x' |
library(boot)
merlin_city_data_fixed_no_boreal <- merlin_city_data_fixed[merlin_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
birdlife_city_data_fixed_no_boreal <- birdlife_city_data_fixed[birdlife_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
test_model <- function(data, formula) {
fit <- glm(formula, data = data)
cv.glm(data, fit)$delta
print(paste("R2", with(summary(fit), 1 - deviance/null.deviance)))
print(paste("CV Delta", cv.glm(data, fit)$delta))
print(paste("CV Delta", cv.glm(data, fit)$delta[1] - cv.glm(data, fit)$delta[2]))
}
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size)
[1] "R2 0.285894381786357"
[1] "CV Delta 13.2924069067549" "CV Delta 13.290989425668"
[1] "CV Delta 0.0014174810869001"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size)
[1] "R2 0.132747072834318"
[1] "CV Delta 5.61376539055778" "CV Delta 5.61321468528147"
[1] "CV Delta 0.000550705276301855"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + realm)
[1] "R2 0.355479718662977"
[1] "CV Delta 13.1013113338907" "CV Delta 13.0956030666681"
[1] "CV Delta 0.00570826722257856"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + realm)
[1] "R2 0.215771844466201"
[1] "CV Delta 5.38032952583311" "CV Delta 5.37866865996081"
[1] "CV Delta 0.00166086587230829"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + biome_name)
[1] "R2 0.370210675877385"
[1] "CV Delta 13.3828176878773" "CV Delta 13.3745769694536"
[1] "CV Delta 0.00824071842369101"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + biome_name)
[1] "R2 0.223013658291514"
[1] "CV Delta 5.9146455418679" "CV Delta 5.91040383901086"
[1] "CV Delta 0.00424170285703518"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + biome_name + realm)
[1] "R2 0.404911112981243"
[1] "CV Delta 14.2088898476971" "CV Delta 14.1942054055947"
[1] "CV Delta 0.0146844421024106"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + biome_name + realm)
[1] "R2 0.282011390214033"
[1] "CV Delta 5.61291700874679" "CV Delta 5.60841587721211"
[1] "CV Delta 0.00450113153467768"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + population_growth)
[1] "R2 0.286929092142329"
[1] "CV Delta 13.5753866440016" "CV Delta 13.5727869610531"
[1] "CV Delta 0.00259968294847468"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + population_growth)
[1] "R2 0.134196770612853"
[1] "CV Delta 5.73874002430138" "CV Delta 5.73766284305409"
[1] "CV Delta 0.00107718124729139"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + population_growth + region_50km_ssm)
[1] "R2 0.290846971284311"
[1] "CV Delta 13.7387732818639" "CV Delta 13.7352820567926"
[1] "CV Delta 0.00349122507122068"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + population_growth + region_50km_ssm)
[1] "R2 0.151946781581196"
[1] "CV Delta 5.6860363390934" "CV Delta 5.68473754059936"
[1] "CV Delta 0.00129879849403824"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name + realm)
[1] "R2 0.409838701070553"
[1] "CV Delta 14.5182790115777" "CV Delta 14.5020009668314"
[1] "CV Delta 0.0162780447462367"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name + realm)
[1] "R2 0.287701159199758"
[1] "CV Delta 5.81750910931357" "CV Delta 5.81199090164147"
[1] "CV Delta 0.00551820767210121"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + region_50km_ssm + biome_name + realm)
[1] "R2 0.408007484714431"
[1] "CV Delta 14.3038421650361" "CV Delta 14.2885697715507"
[1] "CV Delta 0.0152723934853718"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + region_50km_ssm + biome_name + realm)
[1] "R2 0.287698318614449"
[1] "CV Delta 5.66636239287125" "CV Delta 5.66147724370065"
[1] "CV Delta 0.00488514917060012"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + region_50km_ssm + realm)
[1] "R2 0.359678173824136"
[1] "CV Delta 13.2441153115841" "CV Delta 13.2375767587302"
[1] "CV Delta 0.00653855285384175"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + region_50km_ssm + realm)
[1] "R2 0.226422719550459"
[1] "CV Delta 5.38608965982801" "CV Delta 5.38414802481297"
[1] "CV Delta 0.00194163501504718"
test_model(merlin_city_data_fixed_no_boreal, response ~ merlin_pool_size + region_50km_ssm + biome_name)
[1] "R2 0.370916016196228"
[1] "CV Delta 13.5692480406954" "CV Delta 13.5602234710036"
[1] "CV Delta 0.00902456969186893"
test_model(birdlife_city_data_fixed_no_boreal, response ~ birdlife_pool_size + region_50km_ssm + biome_name)
[1] "R2 0.229572509441164"
[1] "CV Delta 5.99492133909034" "CV Delta 5.99012318220011"
[1] "CV Delta 0.00479815689022445"
AIC(
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + realm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + biome_name),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + biome_name + realm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + population_growth),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + region_50km_ssm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + population_growth + region_50km_ssm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name + realm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + region_50km_ssm + biome_name + realm),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + region_50km_ssm + biome_name),
glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ merlin_pool_size + region_50km_ssm + realm)
)
AIC(
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + realm),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + biome_name),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + biome_name + realm),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + population_growth),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + region_50km_ssm),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm),
glm(data = birdlife_city_data_fixed_no_boreal, formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name + realm)
)
merlin.fit <- glm(data = merlin_city_data_fixed, formula = response ~ merlin_pool_size + population_growth + region_50km_ssm + biome_name + realm)
plot(merlin.fit)
Warning: not plotting observations with leverage one:
113
birdlife.fit <- glm(data = birdlife_city_data_fixed, formula = response ~ birdlife_pool_size + population_growth + region_50km_ssm + biome_name + realm)
plot(birdlife.fit)
Warning: not plotting observations with leverage one:
113
| But can we order cities based on how good they are for biodiversity? |
merlin_city_data_named <- fetch_city_data_for('merlin', T)
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data_named <- fetch_city_data_for('birdlife', T)
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data_fixed$residuals <- resid(merlin.fit)
birdlife_city_data_fixed$residuals <- resid(birdlife.fit)
ggplot(merlin_city_data_fixed, aes(y = response, x = residuals)) + geom_point() + geom_smooth(method = "lm", se = F)
`geom_smooth()` using formula 'y ~ x'
cor(merlin_city_data_fixed$response, merlin_city_data_fixed$residuals)
[1] 0.7665527
ggplot(birdlife_city_data_fixed, aes(y = response, x = residuals)) + geom_point() + geom_smooth(method = "lm", se = F)
`geom_smooth()` using formula 'y ~ x'
cor(birdlife_city_data_fixed$response, birdlife_city_data_fixed$residuals)
[1] 0.8398848
ordered_cities <- data.frame(
ranked_performance = 1:nrow(merlin_city_data_named),
merlin_base_response = merlin_city_data_named$name[order(-merlin_city_data$response)],
birdlife_base_response = merlin_city_data_named$name[order(-birdlife_city_data$response)],
merlin_model_residuals = merlin_city_data_named$name[order(-merlin_city_data$residuals)],
birdlife_model_residuals = merlin_city_data_named$name[order(-birdlife_city_data$residuals)]
)
ordered_cities
write_csv(ordered_cities, "city_effect_residuals.csv")
| What is going on with the response? |
library(ggrepel)
merlin_city_data_fixed$name <- merlin_city_data_named$name
plot_merlin_poolsize <- ggplot(merlin_city_data_fixed, aes(y = response, x = merlin_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Merlin response given pool size")
plot_merlin_poolsize
birdlife_city_data_fixed$name <- birdlife_city_data_named$name
plot_birdlife_poolsize <- ggplot(birdlife_city_data_fixed, aes(y = response, x = birdlife_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Birdlife response given pool size")
plot_birdlife_poolsize
| Summary of models |
summary(merlin.fit)
Call:
glm(formula = response ~ merlin_pool_size + population_growth +
region_50km_ssm + biome_name + realm, data = merlin_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.5836 -1.9802 -0.2806 1.4883 16.1666
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.558383 4.200653 0.609 0.5437
merlin_pool_size -0.027339 0.003575 -7.647 6.55e-12 ***
population_growth 0.003068 0.005114 0.600 0.5497
region_50km_ssm -0.053173 0.072982 -0.729 0.4677
biome_nameDeserts & Xeric Shrublands 4.605968 3.868850 1.191 0.2363
biome_nameFlooded Grasslands & Savannas 0.525908 4.481070 0.117 0.9068
biome_nameMangroves 8.441618 4.591210 1.839 0.0685 .
biome_nameMediterranean Forests, Woodlands & Scrub 4.145677 3.732373 1.111 0.2690
biome_nameMontane Grasslands & Shrublands 5.023979 4.774921 1.052 0.2949
biome_nameTemperate Broadleaf & Mixed Forests 4.686888 3.622288 1.294 0.1983
biome_nameTemperate Conifer Forests 4.317564 4.479751 0.964 0.3372
biome_nameTemperate Grasslands, Savannas & Shrublands 5.637210 4.037582 1.396 0.1653
biome_nameTropical & Subtropical Coniferous Forests 7.544896 4.609955 1.637 0.1044
biome_nameTropical & Subtropical Dry Broadleaf Forests 4.834888 3.977950 1.215 0.2267
biome_nameTropical & Subtropical Grasslands, Savannas & Shrublands 7.209246 4.186447 1.722 0.0877 .
biome_nameTropical & Subtropical Moist Broadleaf Forests 4.084507 3.830228 1.066 0.2885
realmAustralasia -0.633465 2.622994 -0.242 0.8096
realmIndomalayan 1.301503 1.655451 0.786 0.4334
realmNearctic 2.083151 1.879997 1.108 0.2701
realmNeotropic 2.585444 1.767718 1.463 0.1463
realmPalearctic -0.323305 1.843458 -0.175 0.8611
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.50991)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 1451.1 on 116 degrees of freedom
AIC: 756.13
Number of Fisher Scoring iterations: 2
summary(birdlife.fit)
Call:
glm(formula = response ~ birdlife_pool_size + population_growth +
region_50km_ssm + biome_name + realm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.1697 -1.2864 -0.2075 0.8359 9.4606
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.639e+00 2.811e+00 1.295 0.1980
birdlife_pool_size -1.298e-02 2.705e-03 -4.798 4.82e-06 ***
population_growth -7.172e-05 3.334e-03 -0.022 0.9829
region_50km_ssm -4.483e-02 4.664e-02 -0.961 0.3385
biome_nameDeserts & Xeric Shrublands 3.037e+00 2.499e+00 1.215 0.2267
biome_nameFlooded Grasslands & Savannas 5.831e-01 2.904e+00 0.201 0.8412
biome_nameMangroves 3.282e+00 2.980e+00 1.101 0.2730
biome_nameMediterranean Forests, Woodlands & Scrub 2.506e+00 2.415e+00 1.038 0.3015
biome_nameMontane Grasslands & Shrublands 2.011e+00 3.094e+00 0.650 0.5170
biome_nameTemperate Broadleaf & Mixed Forests 3.067e+00 2.345e+00 1.308 0.1934
biome_nameTemperate Conifer Forests 4.456e+00 2.907e+00 1.533 0.1280
biome_nameTemperate Grasslands, Savannas & Shrublands 4.342e+00 2.616e+00 1.660 0.0996 .
biome_nameTropical & Subtropical Coniferous Forests 3.878e+00 2.988e+00 1.298 0.1969
biome_nameTropical & Subtropical Dry Broadleaf Forests 3.043e+00 2.570e+00 1.184 0.2389
biome_nameTropical & Subtropical Grasslands, Savannas & Shrublands 1.675e+00 2.724e+00 0.615 0.5400
biome_nameTropical & Subtropical Moist Broadleaf Forests 1.819e+00 2.482e+00 0.733 0.4651
realmAustralasia -1.882e+00 1.700e+00 -1.107 0.2706
realmIndomalayan -8.270e-01 1.076e+00 -0.769 0.4436
realmNearctic -2.992e+00 1.228e+00 -2.437 0.0163 *
realmNeotropic -6.657e-01 1.143e+00 -0.582 0.5615
realmPalearctic -2.961e+00 1.225e+00 -2.417 0.0172 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 5.26285)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 610.49 on 116 degrees of freedom
AIC: 637.51
Number of Fisher Scoring iterations: 2
| Review anovas |
birdlife.biome.anovoa <- aov(response ~ biome_name, data=birdlife_city_data_fixed)
summary(birdlife.biome.anovoa)
Df Sum Sq Mean Sq F value Pr(>F)
biome_name 12 98.7 8.225 1.33 0.21
Residuals 124 766.7 6.183
merlin.biome.anovoa <- aov(response ~ biome_name, data=merlin_city_data_fixed)
summary(merlin.biome.anovoa)
Df Sum Sq Mean Sq F value Pr(>F)
biome_name 12 212.3 17.69 0.972 0.479
Residuals 124 2257.3 18.20
birdlife.realm.anovoa <- aov(response ~ realm, data=birdlife_city_data_fixed)
summary(birdlife.realm.anovoa)
Df Sum Sq Mean Sq F value Pr(>F)
realm 5 0.0 0.000 0 1
Residuals 131 865.4 6.606
merlin.realm.anovoa <- aov(response ~ realm, data=merlin_city_data_fixed)
summary(merlin.realm.anovoa)
Df Sum Sq Mean Sq F value Pr(>F)
realm 5 0 0.00 0 1
Residuals 131 2470 18.85
interaction.plot(merlin_city_data_fixed$realm, merlin_city_data_fixed$biome_name, merlin_city_data_fixed$response)
meriin.addative.anova <- aov(response ~ biome_name + realm, data=merlin_city_data_fixed)
summary(meriin.addative.anova)
Df Sum Sq Mean Sq F value Pr(>F)
biome_name 12 212.3 17.692 0.938 0.511
realm 5 13.9 2.785 0.148 0.980
Residuals 119 2243.4 18.852
interaction.plot(merlin_city_data_fixed$realm, merlin_city_data_fixed$biome_name, merlin_city_data_fixed$response)
meriin.interaction.anova <- aov(response ~ biome_name * realm, data=merlin_city_data_fixed)
summary(meriin.interaction.anova)
Df Sum Sq Mean Sq F value Pr(>F)
biome_name 12 212.3 17.692 0.890 0.559
realm 5 13.9 2.785 0.140 0.983
biome_name:realm 13 136.3 10.487 0.528 0.903
Residuals 106 2107.1 19.878
| Does the auto correlation go if we leave out pool size? |
– Merlin
merlin_city_data_no_pool_size <- fetch_city_data_for('merlin', include_pool_size = F)
merlin_city_data_no_pool_size <- rfImpute(response ~ ., merlin_city_data_no_pool_size)
merlin_city_data_no_pool_size
select_variables_from_random_forest(merlin_city_data_no_pool_size)
exclude_merlin_2 <- !names(merlin_city_data_no_pool_size) %in% select_scales(urban = 20, cultivated = 30, elevation_delta = 50, mean_elevation = 100, average_pop_density = 50, includes_estuary = NA, ssm = 50, susm = 200, ndvi = 50, percentage_protected = 50)
merlin_city_data_no_pool_size_single_scale <- merlin_city_data_no_pool_size[,exclude_merlin_2]
select_variables_from_random_forest(merlin_city_data_no_pool_size_single_scale)
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected", "temperature_monthly_max")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected", "temperature_monthly_max", "happiness_negative_effect")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected", "temperature_monthly_max", "happiness_negative_effect", "cultivated")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected", "temperature_monthly_max", "happiness_negative_effect", "cultivated", "mean_population_exposure_to_pm2_5_2019")])
create_fifty_rows_of_oob(merlin_city_data_no_pool_size[,c("response", "region_50km_ssm", "region_50km_elevation_delta", "permanent_water", "biome_name", "city_ndvi", "temperature_annual_average", "shrubs", "temperature_monthly_min", "region_20km_urban", "herbaceous_wetland", "city_max_pop_density", "city_gdp_per_population", "city_ssm", "city_average_pop_density", "region_50km_average_pop_density", "region_50km_percentage_protected", "temperature_monthly_max", "happiness_negative_effect", "cultivated", "mean_population_exposure_to_pm2_5_2019", "realm")])
“region_50km_ssm”, “region_50km_elevation_delta”, “permanent_water”, “biome_name”, “city_ndvi”
merlin.fit3 <- glm(data = merlin_city_data_no_pool_size, formula = response ~ region_50km_ssm + region_50km_elevation_delta + permanent_water + biome_name + city_ndvi)
plot(merlin.fit3)
Warning: not plotting observations with leverage one:
113
with(summary(merlin.fit3), 1 - deviance/null.deviance)
[1] 0.1479997
merlin_city_data_no_pool_size$residuals <- resid(merlin.fit3)
ggplot(merlin_city_data_no_pool_size, aes(x = residuals, y = response)) + geom_point() + geom_smooth(method = "lm", se=F)
`geom_smooth()` using formula 'y ~ x'
cor(merlin_city_data_no_pool_size$response, merlin_city_data_no_pool_size$residuals)